"""
解析PRL列表
"""

from html.parser import HTMLParser

class PRLParser(HTMLParser):
    _trigger = False
    _suggest = False

    article_list = []
    editor_suggestion = {}

    def handle_starttag(self, tag, attrs):
        #先找<a name=sect-letters-condensed-matter-and-materials></a>
        if not self._trigger: 
            if tag != 'a':
                return
            if attrs[0][0] == "name" and attrs[0][1] == "sect-letters-condensed-matter-and-materials":
                #print(attrs)
                self._trigger = True
                return
        #再找<div class="article ...">
        if self._trigger:
            if tag != 'div':
                return
            if attrs and attrs[0][0] == "class" and attrs[0][1].startswith("article"):
                if attrs[1][0] == "data-id":
                    self.article_list.append(attrs[1][1])
                else:
                    raise ValueError("not data-id")
                #self._article = True
                #print(attrs)
            #如果看到是editor suggestion
            #print(attrs)
            if attrs and attrs[0][0] == "class" and attrs[0][1].startswith("teaser"):
                self._suggest = True

        

    def handle_endtag(self, tag):
        #
        #撞到section即为结束
        if self._trigger and tag == "section":
            self._trigger = False
            #self._article = False
            print("End of a ", tag)

    def handle_data(self, data):
        if self._suggest:
            self.editor_suggestion[self.article_list[-1]] = data
            self._suggest = False
        #print("Encountered some data  :", data)